Source: Replication Data for “Disparities in PM2.5 air pollution in the United States”
This data shows air pollution at each census tract. It specifically focuses on concentrations of PM2.5, meaning fine particulate matter that is less than 2.5 micrometers in diameter. PM2.5 concentrations are measured by the number of micrograms per cubic meter. It shows PM2.5 concentrations at every year from 1981-2016. However, we focus only on 1981 and 2016 and the changes between the two years.
glimpse(airquality)
## Rows: 50
## Columns: 9
## $ trtid10 <dbl> 51003010100, 51003010201, 51003010202, 51003010300, 51…
## $ pm2_5_1981 <dbl> 22.424961, 13.811016, 10.633014, 24.722347, 14.738005,…
## $ pm2_5_2016 <dbl> 6.137776, 3.962960, 3.051047, 7.119418, 4.167065, 2.79…
## $ percentile_1981 <dbl> 54.264661, 22.431369, 14.253857, 64.884069, 24.607330,…
## $ percentile_2016 <dbl> 38.138176, 17.884152, 10.742680, 52.455747, 19.704147,…
## $ STATEFP00 <dbl> 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51…
## $ COUNTYFP00 <chr> "003", "003", "003", "003", "003", "003", "003", "003"…
## $ PM_change <dbl> -16.287185, -9.848057, -7.581967, -17.602929, -10.5709…
## $ pctile_change <dbl> -16.126485, -4.547217, -3.511178, -12.428322, -4.90318…
Observations are census tract estimates of:
PM2_5_1981 and PM2_5_2016)percentile_1981 and percentile_2016)PM_change)pctile_change)Five-number summaries of all variables:
airquality %>% select(-c(trtid10, STATEFP00, COUNTYFP00)) %>%
select(where(~is.numeric(.x) && !is.na(.x))) %>%
as.data.frame() %>%
stargazer(., type = "text", title = "Summary Statistics", digits = 0,
summary.stat = c("mean", "sd", "min", "median", "max"))
##
## Summary Statistics
## ============================================
## Statistic Mean St. Dev. Min Median Max
## --------------------------------------------
## pm2_5_1981 21 9 5 24 62
## pm2_5_2016 6 3 1 7 17
## percentile_1981 52 27 4 63 100
## percentile_2016 43 25 3 49 99
## PM_change -15 7 -45 -17 -4
## pctile_change -9 4 -16 -9 -0
## --------------------------------------------
Visual representations of the data:
airquality %>% select(trtid10, percentile_1981, percentile_2016) %>%
pivot_longer(-trtid10, names_to = "measure", values_to = "value") %>%
ggplot(aes(x = value, fill = measure)) +
geom_histogram() +
facet_wrap(~measure, scales = "free") +
xlim(0,100)
airquality %>%
ggplot() +
geom_point(aes(x=percentile_1981, y=percentile_2016)) +
xlim(0, 100) +
ylim(0, 100)
airquality %>%
ggplot() +
geom_point(aes(x=percentile_1981, y=pctile_change)) +
xlim(0, 100)
airquality %>%
ggplot() +
geom_point(aes(x=percentile_2016, y=pctile_change)) +
xlim(0, 100)
pal <- colorNumeric("plasma", reverse = TRUE, domain = cvilleshapes$percentile_1981)
leaflet(cvilleshapes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = cvilleshapes,
fillColor = ~pal(percentile_1981),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.6,
highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
popup = paste0("Tract Number: ", cvilleshapes$GEOID, "<br>",
"Percentile: ", round(cvilleshapes$percentile_1981, 2))) %>%
addLegend("bottomright", pal = pal, values = cvilleshapes$percentile_1981,
title = "Percentile, 1981", opacity = 0.7)
pal <- colorNumeric("plasma", reverse = TRUE, domain = cvilleshapes$percentile_2016)
leaflet(cvilleshapes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = cvilleshapes,
fillColor = ~pal(percentile_2016),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.6,
highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
popup = paste0("Tract Number: ", cvilleshapes$GEOID, "<br>",
"Percentile: ", round(cvilleshapes$percentile_2016, 2))) %>%
addLegend("bottomright", pal = pal, values = cvilleshapes$percentile_2016,
title = "Percentile, 2016", opacity = 0.7)
pal <- colorNumeric("plasma", reverse = TRUE, domain = cvilleshapes$pctile_change)
leaflet(cvilleshapes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = cvilleshapes,
fillColor = ~pal(pctile_change),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.6,
highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
popup = paste0("Tract Number: ", cvilleshapes$GEOID, "<br>",
"Percentile Change: ", round(cvilleshapes$pctile_change, 2))) %>%
addLegend("bottomright", pal = pal, values = cvilleshapes$pctile_change,
title = "Percentile Change, 1981-2016", opacity = 0.7)
pal <- colorNumeric("plasma", reverse = TRUE, domain = cvilleshapes$pm2_5_2016)
leaflet(cvilleshapes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = cvilleshapes,
fillColor = ~pal(pm2_5_2016),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.6,
highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
popup = paste0("Tract Number: ", cvilleshapes$GEOID, "<br>",
"Concentration: ", cvilleshapes$pm2_5_2016)) %>%
addLegend("bottomright", pal = pal, values = cvilleshapes$pm2_5_2016,
title = "PM2.5 Concentration, 2016", opacity = 0.7)
pal <- colorNumeric("plasma", reverse = TRUE, domain = cvilleshapes$PM_change)
leaflet(cvilleshapes) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = cvilleshapes,
fillColor = ~pal(PM_change),
weight = 1,
opacity = 1,
color = "white",
fillOpacity = 0.6,
highlight = highlightOptions(weight = 2, fillOpacity = 0.8, bringToFront = T),
popup = paste0("Tract Number: ", cvilleshapes$GEOID, "<br>",
"PM2.5 Change: ", round(cvilleshapes$PM_change, 2))) %>%
addLegend("bottomright", pal = pal, values = cvilleshapes$PM_change,
title = "Change in PM2.5, 1981-2016", opacity = 0.7)
The original data uses 2000 census tracts, since that is roughly the midpoint of their 1981-2016 time frame. Since we want to be able to look at this in relation to other tract-level data, we needed this in 2010 census tracts. To estimate the data for 2010 census tracts, I used the bridging data found here.
Since the conversion to 2010 census tracts involved some estimation, the PM2.5 levels in the data may not be as accurate as the original 2000 data.